"""
Integration tests for delete cascade behavior.

Tests end-to-end deletion scenarios verifying proper cleanup
of all related records across the database.
"""

import pytest
import uuid

from sqlalchemy import create_engine, event
from sqlalchemy.orm import sessionmaker

from local_deep_research.database.models.library import (
    Document,
    DocumentBlob,
    DocumentChunk,
    DocumentCollection,
    Collection,
    CollectionFolder,
    SourceType,
    RAGIndex,
    EmbeddingProvider,
)
from local_deep_research.database.models.base import Base


@pytest.fixture
def test_engine():
    """Create an in-memory SQLite database for testing."""
    engine = create_engine("sqlite:///:memory:")

    # Enable foreign key enforcement for SQLite (required for CASCADE to work)
    @event.listens_for(engine, "connect")
    def set_sqlite_pragma(dbapi_connection, connection_record):
        cursor = dbapi_connection.cursor()
        cursor.execute("PRAGMA foreign_keys=ON")
        cursor.close()

    Base.metadata.create_all(engine)
    return engine


@pytest.fixture
def test_session(test_engine):
    """Create a session for testing."""
    Session = sessionmaker(bind=test_engine)
    session = Session()
    yield session
    session.close()


@pytest.fixture
def source_type(test_session):
    """Create a source type for testing."""
    st = SourceType(
        id=str(uuid.uuid4()),
        name="user_upload",
        display_name="User Upload",
    )
    test_session.add(st)
    test_session.commit()
    return st


def create_full_document(
    session, source_type, collection, with_blob=True, with_chunks=True
):
    """
    Create a document with all related records:
    - DocumentBlob (optional)
    - DocumentCollection link
    - DocumentChunks (optional)
    - RagDocumentStatus
    """
    doc_id = str(uuid.uuid4())
    collection_name = f"collection_{collection.id}"

    # Create document
    doc = Document(
        id=doc_id,
        source_type_id=source_type.id,
        document_hash=f"hash_{doc_id[:8]}",
        file_size=1024,
        file_type="pdf",
        title=f"Document {doc_id[:8]}",
        filename=f"doc_{doc_id[:8]}.pdf",
        storage_mode="database" if with_blob else "none",
        text_content="Test document content for full cascade test.",
    )
    session.add(doc)
    session.flush()

    # Create blob
    if with_blob:
        blob = DocumentBlob(
            document_id=doc_id,
            pdf_binary=b"PDF content " * 100,
            blob_hash=f"blob_hash_{doc_id[:8]}",
        )
        session.add(blob)

    # Create collection link
    doc_collection = DocumentCollection(
        document_id=doc_id,
        collection_id=collection.id,
        indexed=with_chunks,
        chunk_count=3 if with_chunks else 0,
    )
    session.add(doc_collection)

    # Create chunks
    chunks = []
    if with_chunks:
        for i in range(3):
            chunk = DocumentChunk(
                chunk_hash=f"chunk_{doc_id[:8]}_{i}_{uuid.uuid4().hex[:6]}",
                source_type="document",
                source_id=doc_id,
                collection_name=collection_name,
                chunk_text=f"Chunk {i} of document {doc_id[:8]}",
                chunk_index=i,
                start_char=i * 100,
                end_char=(i + 1) * 100,
                word_count=10,
                embedding_id=str(uuid.uuid4()),
                embedding_model="all-MiniLM-L6-v2",
                embedding_model_type=EmbeddingProvider.SENTENCE_TRANSFORMERS,
            )
            session.add(chunk)
            chunks.append(chunk)

    session.commit()
    return doc, chunks


class TestFullDocumentCascade:
    """
    Integration tests for complete document deletion cascade.

    Create document with:
    - DocumentBlob
    - DocumentCollection (multiple)
    - DocumentChunk (multiple)
    - RagDocumentStatus

    Delete document, verify ALL related records cleaned up.
    """

    def test_document_delete_full_cascade(self, test_session, source_type):
        """Complete cascade cleanup when deleting a document."""
        # Create two collections
        collection1 = Collection(
            id=str(uuid.uuid4()),
            name="Collection 1",
        )
        collection2 = Collection(
            id=str(uuid.uuid4()),
            name="Collection 2",
        )
        test_session.add_all([collection1, collection2])
        test_session.commit()

        # Create document with blob
        doc_id = str(uuid.uuid4())
        doc = Document(
            id=doc_id,
            source_type_id=source_type.id,
            document_hash=f"hash_{doc_id[:8]}",
            file_size=2048,
            file_type="pdf",
            title="Full Cascade Test Document",
            storage_mode="database",
            text_content="Content for cascade test.",
        )
        test_session.add(doc)
        test_session.flush()

        # Add blob
        blob = DocumentBlob(
            document_id=doc_id,
            pdf_binary=b"PDF content for cascade test" * 50,
        )
        test_session.add(blob)

        # Add to both collections
        for coll in [collection1, collection2]:
            link = DocumentCollection(
                document_id=doc_id,
                collection_id=coll.id,
                indexed=True,
                chunk_count=2,
            )
            test_session.add(link)

        # Add chunks for both collections
        for coll in [collection1, collection2]:
            collection_name = f"collection_{coll.id}"
            for i in range(2):
                chunk = DocumentChunk(
                    chunk_hash=f"chunk_{coll.id[:8]}_{i}_{uuid.uuid4().hex[:6]}",
                    source_type="document",
                    source_id=doc_id,
                    collection_name=collection_name,
                    chunk_text=f"Chunk {i} in {coll.name}",
                    chunk_index=i,
                    start_char=i * 50,
                    end_char=(i + 1) * 50,
                    word_count=5,
                    embedding_id=str(uuid.uuid4()),
                    embedding_model="test-model",
                    embedding_model_type=EmbeddingProvider.SENTENCE_TRANSFORMERS,
                )
                test_session.add(chunk)

        test_session.commit()

        # Verify everything exists
        assert test_session.query(Document).get(doc_id) is not None
        assert (
            test_session.query(DocumentBlob)
            .filter_by(document_id=doc_id)
            .first()
            is not None
        )
        assert (
            test_session.query(DocumentCollection)
            .filter_by(document_id=doc_id)
            .count()
            == 2
        )
        assert (
            test_session.query(DocumentChunk)
            .filter_by(source_id=doc_id, source_type="document")
            .count()
            == 4
        )

        # === PERFORM DELETION ===
        # Step 1: Delete chunks (no FK, manual cleanup required)
        test_session.query(DocumentChunk).filter(
            DocumentChunk.source_id == doc_id,
            DocumentChunk.source_type == "document",
        ).delete(synchronize_session=False)

        # Step 2: Delete document (CASCADE handles blob and collection links)
        document = test_session.query(Document).get(doc_id)
        test_session.delete(document)
        test_session.commit()

        # === VERIFY COMPLETE CLEANUP ===
        assert test_session.query(Document).get(doc_id) is None, (
            "Document deleted"
        )
        assert (
            test_session.query(DocumentBlob)
            .filter_by(document_id=doc_id)
            .first()
            is None
        ), "Blob deleted (CASCADE)"
        assert (
            test_session.query(DocumentCollection)
            .filter_by(document_id=doc_id)
            .count()
            == 0
        ), "Collection links deleted (CASCADE)"
        assert (
            test_session.query(DocumentChunk)
            .filter_by(source_id=doc_id, source_type="document")
            .count()
            == 0
        ), "Chunks deleted (manual)"

        # Collections should still exist
        assert test_session.query(Collection).get(collection1.id) is not None
        assert test_session.query(Collection).get(collection2.id) is not None


class TestFullCollectionCascade:
    """
    Integration tests for complete collection deletion cascade.

    Create collection with:
    - Multiple documents
    - DocumentChunks
    - CollectionFolder
    - RAGIndex

    Delete collection, verify cleanup but documents preserved.
    """

    def test_collection_delete_full_cascade(self, test_session, source_type):
        """Complete cascade cleanup when deleting a collection."""
        # Create collection with all features
        collection = Collection(
            id=str(uuid.uuid4()),
            name="Full Featured Collection",
            embedding_model="all-MiniLM-L6-v2",
            embedding_model_type=EmbeddingProvider.SENTENCE_TRANSFORMERS,
            embedding_dimension=384,
        )
        test_session.add(collection)
        test_session.flush()

        collection_name = f"collection_{collection.id}"

        # Add folder
        folder = CollectionFolder(
            collection_id=collection.id,
            folder_path="/tmp/test_collection_folder",
            recursive=True,
            file_count=5,
        )
        test_session.add(folder)

        # Add RAG index
        rag_index = RAGIndex(
            collection_name=collection_name,
            embedding_model="all-MiniLM-L6-v2",
            embedding_model_type=EmbeddingProvider.SENTENCE_TRANSFORMERS,
            embedding_dimension=384,
            index_path=f"/tmp/{collection_name}/index",
            index_hash=f"index_hash_{collection.id[:8]}",
            chunk_size=500,
            chunk_overlap=50,
            chunk_count=10,
            total_documents=2,
        )
        test_session.add(rag_index)

        # Create documents
        doc_ids = []
        for i in range(2):
            doc_id = str(uuid.uuid4())
            doc = Document(
                id=doc_id,
                source_type_id=source_type.id,
                document_hash=f"doc_hash_{doc_id[:8]}",
                file_size=512 * (i + 1),
                file_type="pdf",
                title=f"Collection Doc {i + 1}",
                text_content=f"Content {i + 1}",
            )
            test_session.add(doc)
            test_session.flush()

            link = DocumentCollection(
                document_id=doc_id,
                collection_id=collection.id,
                indexed=True,
                chunk_count=5,
            )
            test_session.add(link)

            # Add chunks
            for j in range(5):
                chunk = DocumentChunk(
                    chunk_hash=f"chunk_{doc_id[:6]}_{j}_{uuid.uuid4().hex[:4]}",
                    source_type="document",
                    source_id=doc_id,
                    collection_name=collection_name,
                    chunk_text=f"Chunk {j} of doc {i + 1}",
                    chunk_index=j,
                    start_char=j * 20,
                    end_char=(j + 1) * 20,
                    word_count=3,
                    embedding_id=str(uuid.uuid4()),
                    embedding_model="all-MiniLM-L6-v2",
                    embedding_model_type=EmbeddingProvider.SENTENCE_TRANSFORMERS,
                )
                test_session.add(chunk)

            doc_ids.append(doc_id)

        test_session.commit()

        # Verify everything exists
        assert test_session.query(Collection).get(collection.id) is not None
        assert (
            test_session.query(CollectionFolder)
            .filter_by(collection_id=collection.id)
            .first()
            is not None
        )
        assert (
            test_session.query(RAGIndex)
            .filter_by(collection_name=collection_name)
            .first()
            is not None
        )
        assert (
            test_session.query(DocumentCollection)
            .filter_by(collection_id=collection.id)
            .count()
            == 2
        )
        assert (
            test_session.query(DocumentChunk)
            .filter_by(collection_name=collection_name)
            .count()
            == 10
        )

        # === PERFORM DELETION ===
        # Step 1: Delete chunks (no FK, manual cleanup)
        test_session.query(DocumentChunk).filter_by(
            collection_name=collection_name
        ).delete(synchronize_session=False)

        # Step 2: Delete RAG index
        test_session.query(RAGIndex).filter_by(
            collection_name=collection_name
        ).delete(synchronize_session=False)

        # Step 3: Delete collection (CASCADE handles folders, links)
        coll = test_session.query(Collection).get(collection.id)
        test_session.delete(coll)
        test_session.commit()

        # === VERIFY CLEANUP ===
        assert test_session.query(Collection).get(collection.id) is None, (
            "Collection deleted"
        )
        assert (
            test_session.query(CollectionFolder)
            .filter_by(collection_id=collection.id)
            .first()
            is None
        ), "Folder deleted (CASCADE)"
        assert (
            test_session.query(RAGIndex)
            .filter_by(collection_name=collection_name)
            .first()
            is None
        ), "RAG index deleted"
        assert (
            test_session.query(DocumentCollection)
            .filter_by(collection_id=collection.id)
            .count()
            == 0
        ), "Links deleted (CASCADE)"
        assert (
            test_session.query(DocumentChunk)
            .filter_by(collection_name=collection_name)
            .count()
            == 0
        ), "Chunks deleted"

        # Documents should still exist!
        for doc_id in doc_ids:
            doc = test_session.query(Document).get(doc_id)
            assert doc is not None, f"Document {doc_id[:8]} preserved"


class TestOrphanDetection:
    """Tests to verify no orphaned records after deletions."""

    def test_no_orphaned_chunks_after_document_delete(
        self, test_session, source_type
    ):
        """Verify no orphaned chunks remain after proper document deletion."""
        collection = Collection(id=str(uuid.uuid4()), name="Test")
        test_session.add(collection)
        test_session.commit()

        doc, chunks = create_full_document(
            test_session, source_type, collection, with_chunks=True
        )
        doc_id = doc.id

        # Proper deletion: chunks first, then document
        test_session.query(DocumentChunk).filter(
            DocumentChunk.source_id == doc_id,
            DocumentChunk.source_type == "document",
        ).delete(synchronize_session=False)

        test_session.delete(doc)
        test_session.commit()

        # Check for orphans
        orphaned_chunks = (
            test_session.query(DocumentChunk)
            .filter_by(source_id=doc_id, source_type="document")
            .all()
        )
        assert len(orphaned_chunks) == 0, "No orphaned chunks should exist"

    def test_no_orphaned_blobs_after_document_delete(
        self, test_session, source_type
    ):
        """Verify no orphaned blobs remain after document deletion."""
        collection = Collection(id=str(uuid.uuid4()), name="Test")
        test_session.add(collection)
        test_session.commit()

        doc, _ = create_full_document(
            test_session,
            source_type,
            collection,
            with_blob=True,
            with_chunks=False,
        )
        doc_id = doc.id

        # Delete document (CASCADE should handle blob)
        test_session.delete(doc)
        test_session.commit()

        # Check for orphan blobs
        orphaned_blob = (
            test_session.query(DocumentBlob)
            .filter_by(document_id=doc_id)
            .first()
        )
        assert orphaned_blob is None, "No orphaned blob should exist"

    def test_no_orphaned_links_after_collection_delete(
        self, test_session, source_type
    ):
        """Verify no orphaned DocumentCollection links after collection deletion."""
        collection = Collection(id=str(uuid.uuid4()), name="Test")
        test_session.add(collection)
        test_session.commit()

        doc, _ = create_full_document(
            test_session, source_type, collection, with_chunks=False
        )
        collection_id = collection.id

        # Delete collection
        test_session.delete(collection)
        test_session.commit()

        # Check for orphan links
        orphaned_links = (
            test_session.query(DocumentCollection)
            .filter_by(collection_id=collection_id)
            .all()
        )
        assert len(orphaned_links) == 0, "No orphaned links should exist"

        # Document should still exist
        assert test_session.query(Document).get(doc.id) is not None
